load(file = '~/Dropbox/Media Capture/BJPS/Empirical/Data/fotp_rsf_vdem.Rda')

library(plm) # fixed effects regressions
library(modelsummary) # tables and plots

## "fotp" is "Freedom of the Press" from Freedom House, the main dependent 
## variable. "lfotp" is "fotp" lagged one year. "pen" is Internet penetration 
## data from ITU. "checks,", "gdp," and "population" are the number of 
## checks and balances, gdp per capita, and population respectively.  

##########################     SUMMARY TABLES    ############################

## Table 1 in the Appendix. 
datasummary(
  (`Internet penetration` = pen) + 
    (`Freedom of the Press` = fotp) + 
    (`Press Freedom Index` = rsf) + 
    (`Media self-censorship` = v2meslfcen) ~ 
    Mean + (`Std. Dev.` = SD) + (`Min` = P0) + (`1st` = P25) + 
    (`2nd` = P50) + (`3rd` = P75) + (`Max` = P100), data = mer,
  title = "Mean, standard deviation, and the quartiles of variables.
  \\label{table:summary} ")

##########################      REGRESSIONS      ############################

## Regression formulas to estimate
smp.reg <- I(100 - fotp) ~ pen
main.reg <- I(100 - fotp) ~ pen + checks + I(log(gdp)) + I(log(population))
lagged.reg <- I(100 - fotp) ~ pen + checks + I(log(gdp)) + I(log(population)) +
  lfotp 
lagged.smp <- I(100 - fotp) ~ pen + lfotp

## Regressions
tw <- plm(main.reg, effect = "twoways", mer, model = "within")
smp.tw <- plm(smp.reg, effect = "twoways", mer, model = "within")
cntry <- plm(main.reg, effect = "individual", mer, model = "within")
smp.cntry <- plm(smp.reg, effect = "individual", mer, model = "within")
ldv.cntry <- plm(lagged.reg, effect = "individual", mer, model = "within")
lsmp.cntry <- plm(lagged.smp, effect = "individual", mer, model = "within")


########################     SUBGROUP ANALYSES    ###########################


## "polity2" is the composite Polity index from the Polity IV dataset.

## Using polity2 for FE regressions by splitting the dataset to countries with
## Polity scores less or greater than 6.
lo.plty <- plm(main.reg, effect = "twoways", model = "within",
               data = mer[mer$polity2.2000 <= 6 & !is.na(mer$polity2.2000), ])
hi.plty <- plm(main.reg, effect = "twoways", model = "within",
               data = mer[mer$polity2.2000 > 6 & !is.na(mer$polity2.2000), ])


## "sta_2000" is countries' Press Freedom Status in 2000 according to 
## Freedom House.

## Using sta_2000 for FE regressions by splitting the dataset to countries with
## a Press Freedom status of "Partly Free," "Free," or "Not Free."
pf <- plm(main.reg, effect = "twoways", 
          data = pdata.frame(mer[mer$sta_2000 == "pf",]), model = "within")
f <- plm(main.reg, effect = "twoways", 
         data = pdata.frame(mer[mer$sta_2000 == "f",]), model = "within")
nf <- plm(main.reg, effect = "twoways", 
          data = pdata.frame(mer[mer$sta_2000 == "nf",]), model = "within")


###############################     RSF    ##################################

## "rsf" is the "Press Freedom Index" by Reporters Without Borders. "lrsf" is 
## the same except lagged one year.

## Regression formulas to estimate
smp.reg.2 <- I(100 - rsf) ~ pen
main.reg.2 <- I(100 - rsf) ~ pen + checks + I(log(gdp)) + I(log(population))
lagged.reg.2 <- I(100 - rsf) ~ pen + checks + I(log(gdp)) + 
  I(log(population)) + lrsf 
lagged.smp.2 <- I(100 - rsf) ~ pen + lrsf

## Regressions
tw.2 <- plm(main.reg.2, effect = "twoways", mer, model = "within")
smp.tw.2 <- plm(smp.reg.2, effect = "twoways", mer, model = "within")
cntry.2 <- plm(main.reg.2, effect = "individual", mer, model = "within")
smp.cntry.2 <- plm(smp.reg.2, effect = "individual", mer, model = "within")
ldv.cntry.2 <- plm(lagged.reg.2, effect = "individual", mer, model = "within")
lsmp.cntry.2 <- plm(lagged.smp.2, effect = "individual", mer, model = "within")


##############################     V-Dem    #################################

## "v2meslfcen" is the "Media Self-censorship Index" by V-Dem. "lv2meslfcen" is
## the same except lagged one year.

## Regression formulas to estimate
smp.reg.3 <- I(v2meslfcen) ~ pen
main.reg.3 <- I(v2meslfcen) ~ pen + checks + I(log(gdp)) + I(log(population))
lagged.reg.3 <- I(v2meslfcen) ~ pen + checks + I(log(gdp)) + 
  I(log(population)) + lv2meslfcen 
lagged.smp.3 <- I(v2meslfcen) ~ pen + lv2meslfcen

## Regressions
tw.3 <- plm(main.reg.3, effect = "twoways", mer, model = "within")
smp.tw.3 <- plm(smp.reg.3, effect = "twoways", mer, model = "within")
cntry.3 <- plm(main.reg.3, effect = "individual", mer, model = "within")
smp.cntry.3 <- plm(smp.reg.3, effect = "individual", mer, model = "within")
ldv.cntry.3 <- plm(lagged.reg.3, effect = "individual", mer, model = "within")
lsmp.cntry.3 <- plm(lagged.smp.3, effect = "individual", mer, model = "within")


#########################     REGRESSION TABLES    ##########################


coefmap <- c("pen" = "Internet penetration", "checks" = "Checks", 
             "I(log(gdp))" = "$\\ln$(GDP per capita)", 
             "I(log(population))" = "$\\ln$(Population)")

yes <- "$\\checkmark$"

star <- c('*' = .1, '**' = .05, '***' = .01) 

note <- "Standard errors are clustered at the country level."

## Table 2 in the Appendix.
modelsummary(
  list(smp.cntry, cntry, smp.tw, tw, lsmp.cntry, ldv.cntry), escape = F, 
  vcov = "robust", stars = star, gof_omit = "Std. Errors", notes = note,
  coef_map = c(coefmap, "lfotp" = "Lagged DV"),
  add_rows = as.data.frame(matrix(nrow = 3, byrow = T, c(
    "Country FE", yes, yes, yes, yes, yes, yes, 
    "Year FE", "", "", yes, yes, "", "",
    "Lagged DV", "", "", "", "", yes, yes
  ))),
  title = "There is a robust negative association between internet penetration 
  and press freedom scores. \\label{table:reg_main}")

## Table 3 in the Appendix.
modelsummary(
  list("Free" = f, "Partly Free" = pf, "Not Free" = nf, "Polity > 6" = hi.plty, 
       "Polity $\\leq$ 6" = lo.plty), escape = F, vcov = "robust", stars = star,
  gof_omit = "Std. Errors", coef_map = coefmap, notes = note,
  add_rows = as.data.frame(matrix(nrow = 2, byrow = T, c(
    "Country FE", yes, yes, yes, yes, yes,
    "Year FE", yes, yes, yes, yes, yes
  ))),
  title = "Subgroup analysis reveals that the negative relationship between 
  internet penetration and press freedom scores is driven by countries with 
  Partly Free press in 2000, and countries whose Polity scores were less than 6
  in 2000. \\label{table:reg_sub} ")

## Table 4 in the Appendix.
modelsummary(
  list(smp.cntry.2, cntry.2, smp.tw.2, tw.2, lsmp.cntry.2, ldv.cntry.2), 
  escape = F, vcov = "robust", gof_omit = "Std. Errors", stars = star, 
  notes = note, coef_map = c(coefmap, "lrsf" = "Lagged DV"),
  add_rows = as.data.frame(matrix(nrow = 3, byrow = T, c(
    "Country FE", yes, yes, yes, yes, yes, yes, 
    "Year FE", "", "", yes, yes, "", "",
    "Lagged DV", "", "", "", "", yes, yes
  ))),
  title = "The negative association between internet penetration and press 
  freedom is also present when Reporters Without Borders' Press Freedom Index 
  is used. \\label{table:reg_rsf} ")

## Table 5 in the Appendix.
modelsummary(
  list(smp.cntry.3, cntry.3, smp.tw.3, tw.3, lsmp.cntry.3, ldv.cntry.3), 
  escape = F, vcov = "robust", gof_omit = "Std. Errors", stars = star, 
  notes = note,
  coef_map = c(coefmap, "lv2meslfcen" = "Lagged DV"),
  add_rows = as.data.frame(matrix(nrow = 3, byrow = T, c(
    "Country FE", yes, yes, yes, yes, yes, yes, 
    "Year FE", "", "", yes, yes, "", "",
    "Lagged DV", "", "", "", "", yes, yes
  ))),
  title = "The negative association between internet penetration and press 
  freedom is also present when the media self-censorship indicator from 
  Varieties of Democracy is used. \\label{table:reg_vdem} ")

############################      FIGURES      ##############################

library(ggplot2)
library(ggthemes)
library(gridExtra)
library(ggforce)
library(dplyr)

## Function that calculates the mean fotp score in 2000 for each subgroup.
mean_fotp_score <- function(status){
  100 - mean(mer$fotp.2000[mer$sta_2000 == status], na.rm = T)
}

## Code to replicate Figure 1 in the main text.
plot.dat <- ggplot(mer, aes(pen, 100 - fotp, color = factor(
  mer$sta_2000, levels = c("f", "pf", "nf")))) + geom_point() +
  geom_segment(x = 0, xend = 100, y = mean_fotp_score("nf"),
               yend = nf$coefficients[1] * 100 + (mean_fotp_score("nf")),
               color = few_pal("Dark")(3)[1]) + 
  geom_segment(x = 0, xend = 100, y = mean_fotp_score("pf"),
               yend = pf$coefficients[1] * 100 + (mean_fotp_score("pf")),
               color = few_pal("Dark")(3)[2]) + 
  geom_segment(x = 0, xend = 100, y = mean_fotp_score("f"), 
               yend = f$coefficients[1] * 100 + (mean_fotp_score("f")),
               color = few_pal("Dark")(3)[3]) + 
  theme_tufte() + 
  theme(panel.grid = element_blank()) +
  labs(x = "Internet Penetration", y = "Press Freedom", color = 
         "Press Freedom \nstatus in 2000",
       title = "Press Freedom and Internet Penetration, 2000-2015",
       subtitle = 'Data points are country-years') +
  scale_color_manual(labels = c("Free", "Partly Free", "Not Free"), 
                     values = few_pal()(3)[c(3, 2, 1)])


###########################     DESCRIPTIVE    ##############################

## Code to replicate Figure 1 in the Appendix. 
desc.fotp <- ggplot(data = mer, aes(x = as.factor(year), y = 100 - fotp)) + 
  geom_boxplot(outlier.size = .2) + xlab("Year") + 
  ylab("Freedom of the Press") + theme_tufte() 

## Code to replicate Figure 2 in the Appendix. 
desc.pen <- ggplot(data = mer, aes(x = as.factor(year), y = pen)) + 
  geom_boxplot(outlier.size = .2) + xlab("Year") + 
  ylab("Internet penetration") + theme_tufte() 

## Countries with the highest change in fotp scores
mer %>% group_by(countrynames) %>% 
  filter(fotp == min(fotp) | fotp == max(fotp)) %>% 
  summarize(year = year, fotp = fotp, diff = max(fotp) - min(fotp)) %>% 
  filter(diff > 30)

## Countries with the lowest change in fotp scores
mer %>% group_by(countrynames) %>% 
  filter(fotp == min(fotp) | fotp == max(fotp)) %>% 
  summarize(year = year, fotp = fotp, diff = max(fotp) - min(fotp)) %>% 
  filter(diff < 4) %>% print(n = 22)

## Countries with the highest change in pen scores
mer %>% group_by(countrynames) %>% 
  filter(pen == min(pen) | pen == max(pen)) %>% 
  summarize(year = year, pen = pen, diff = max(pen) - min(pen)) %>% 
  filter(diff > 80)


###########################     SIMULATIONS    ##############################

library(LaplacesDemon) # inverse logit for simulations 

### m: penetration
### r: office rents
### b: multiplier on how far voters' beliefs are
### c: size of the small outlet

###### following gives a good presentation of what happens when C1 fails ######
### b <- 1/2
### c <- 0.25
### rrr <- runif(60, min = 0, max = 1)
### mmm <- (rbeta(50 * length(rrr), 1, 5) - .4) * 2             

###### following gives a good presentation of what happens when C1 holds ######
### b <- 2
### c <- 0.25
### rrr <- runif(60, min = 0, max = 1)
### mmm <- rbeta(16 * length(rrr), 1, 8) * .6 - .8            


simulator <- function(C1.holds = T, seed = 08542){
  set.seed(seed)
  
  b <- ifelse(C1.holds == T, 2, 1/2)
  c <- 0.25
  
  ## Probability that a large enough group of UV switch to overturn
  p <- function(m){
    1 - pnorm((log(1 - 2 * c) - m) * b)
  }
  
  ## incumbent's EU of capturing both outlets
  EUCC <- function(r, m){
    r - invlogit(m)
  }
  
  ## incumbent's EU of capturing only M 
  EUPC <- function(r, m){
    r * (1 - p(m)) - c * invlogit(m)
  }
  
  ## Incumbent's best response function: 2 is no capture, 1 partial capture, 
  ## 0 complete capture.
  BR <- function(r, m){
    ifelse(EUCC(r, m) >= EUPC(r, m) & EUCC(r, m) >= 0, 0, 
           ifelse(EUPC(r, m) > EUCC(r, m) & EUPC(r, m) >= 0, 1, 2))
  }
  
  ### Create vectors of office rents and penetration.
  rrr <- runif(160, min = 0, max = 1)
  ifelse(C1.holds == T, mmm <- rbeta(16 * length(rrr), 1, 5) * .6 - .8,
         mmm <- (rbeta(16 * length(rrr), 1, 5) - .4) * 2) 
  
  ## Produce a matrix by applying BR to each combination of mmm and rrr.
  br <- BR(rrr, mmm)
  
  ## Create simulated dataset.
  fakedat <- cbind(data.frame(br), c(rrr), c(mmm))
  colnames(fakedat) <- c('score', 'r', 'mu')
  
  ## Split the simulated dataset into terciles according to office rents.
  fakedat$status <- 'PF'
  fakedat$status[rrr <= quantile(rrr, 1/3)] <- "F"
  fakedat$status[rrr >= quantile(rrr, 2/3)] <- "NF"
  
  ## Plot the simulated dataset.
  plot <- ggplot(
    fakedat, aes(x = mu, y = score, color = 
                   factor(status, levels = c("F", "PF", "NF")))) + 
    geom_point(position = position_jitternormal(sd_x = 0, sd_y = 0.3)) +
    geom_smooth(method = "lm", se = T, level = 0.95) + theme_tufte() + 
    theme(panel.grid = element_blank()) +
    labs(x = "Internet Penetration", y = "Press Freedom",
         title = paste0("Simulation results when Condition 1 ", 
                        ifelse(C1.holds, "holds.", "fails.")),
         subtitle = 'Points are simulated country-years',
         color = "Office rents") +
    scale_x_continuous(breaks = waiver(),
                       labels = c(0, 25, 50, 75, 100), 
                       limit = c(-.8, ifelse(C1.holds, -.4, 0.7))) + 
    scale_y_continuous(breaks = c(-1, 0, 1, 2),
                       labels = c(0, 25, 50, 75), limit = c(-1, 3)) + 
    scale_color_manual(
      labels = c("Low", "Middle", "High"), values = few_pal()(3)[c(3, 2, 1)])
  return(plot)
}

## Run the simulation for when Condition 1 holds. Figure 3 in the main text.
plot.sim.c1.holds <- simulator()

## Run the simulation for when Condition 1 fails. Figure 4 in the main text.
plot.sim.c1.fails <- simulator(F)
